Take-Home Exercise 2

A short description of the post.

Darryl Kwok https://example.com/darrylkwok
09-15-2021

Overview

Installing and launching the R packages

packages = c('sp', 'rgdal', 'spNetwork', 'tmap', 'maptools', 'sf', 'raster', 'spatstat', 'tidyverse', 'plotly', 'ggthemes')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}

Data

Import Geospatial Data

train_services_sf <- st_read(dsn="data/geospatial", layer="MRTLRTStnPtt")
Reading layer `MRTLRTStnPtt' from data source 
  `C:\darrylkwok\IS415_blog\_posts\2021-09-15-take-home-exercise-2\data\geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 171 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 6138.311 ymin: 27555.06 xmax: 45254.86 ymax: 47854.2
Projected CRS: SVY21
mpsz_sf <- st_read(dsn = "data/geospatial", layer = "MP14_SUBZONE_WEB_PL")
Reading layer `MP14_SUBZONE_WEB_PL' from data source 
  `C:\darrylkwok\IS415_blog\_posts\2021-09-15-take-home-exercise-2\data\geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 323 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21

Geospatial Data Wrangling

Check the detailed information of the geospatial data

st_crs(train_services_sf)
Coordinate Reference System:
  User input: SVY21 
  wkt:
PROJCRS["SVY21",
    BASEGEOGCRS["SVY21[WGS84]",
        DATUM["World Geodetic System 1984",
            ELLIPSOID["WGS 84",6378137,298.257223563,
                LENGTHUNIT["metre",1]],
            ID["EPSG",6326]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["Degree",0.0174532925199433]]],
    CONVERSION["unnamed",
        METHOD["Transverse Mercator",
            ID["EPSG",9807]],
        PARAMETER["Latitude of natural origin",1.36666666666667,
            ANGLEUNIT["Degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",103.833333333333,
            ANGLEUNIT["Degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["Scale factor at natural origin",1,
            SCALEUNIT["unity",1],
            ID["EPSG",8805]],
        PARAMETER["False easting",28001.642,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",38744.572,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["(E)",east,
            ORDER[1],
            LENGTHUNIT["metre",1,
                ID["EPSG",9001]]],
        AXIS["(N)",north,
            ORDER[2],
            LENGTHUNIT["metre",1,
                ID["EPSG",9001]]]]
st_crs(mpsz_sf)
Coordinate Reference System:
  User input: SVY21 
  wkt:
PROJCRS["SVY21",
    BASEGEOGCRS["SVY21[WGS84]",
        DATUM["World Geodetic System 1984",
            ELLIPSOID["WGS 84",6378137,298.257223563,
                LENGTHUNIT["metre",1]],
            ID["EPSG",6326]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["Degree",0.0174532925199433]]],
    CONVERSION["unnamed",
        METHOD["Transverse Mercator",
            ID["EPSG",9807]],
        PARAMETER["Latitude of natural origin",1.36666666666667,
            ANGLEUNIT["Degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",103.833333333333,
            ANGLEUNIT["Degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["Scale factor at natural origin",1,
            SCALEUNIT["unity",1],
            ID["EPSG",8805]],
        PARAMETER["False easting",28001.642,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",38744.572,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["(E)",east,
            ORDER[1],
            LENGTHUNIT["metre",1,
                ID["EPSG",9001]]],
        AXIS["(N)",north,
            ORDER[2],
            LENGTHUNIT["metre",1,
                ID["EPSG",9001]]]]

We can see from these 2 st_crs() outputs that, the projections are in SVY21, however the EPSG code is 9001. The correct EPSG code is 3414.

Therefore, we will assign the correct EPSG code.

train_services_sf <- st_set_crs(train_services_sf, 3414)
mpsz_sf <- st_set_crs(mpsz_sf, 3414)

Check the crs of the 2 geospatial data again to confirm the changes

st_crs(train_services_sf)
Coordinate Reference System:
  User input: EPSG:3414 
  wkt:
PROJCRS["SVY21 / Singapore TM",
    BASEGEOGCRS["SVY21",
        DATUM["SVY21",
            ELLIPSOID["WGS 84",6378137,298.257223563,
                LENGTHUNIT["metre",1]]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4757]],
    CONVERSION["Singapore Transverse Mercator",
        METHOD["Transverse Mercator",
            ID["EPSG",9807]],
        PARAMETER["Latitude of natural origin",1.36666666666667,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",103.833333333333,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["Scale factor at natural origin",1,
            SCALEUNIT["unity",1],
            ID["EPSG",8805]],
        PARAMETER["False easting",28001.642,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",38744.572,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["northing (N)",north,
            ORDER[1],
            LENGTHUNIT["metre",1]],
        AXIS["easting (E)",east,
            ORDER[2],
            LENGTHUNIT["metre",1]],
    USAGE[
        SCOPE["Cadastre, engineering survey, topographic mapping."],
        AREA["Singapore - onshore and offshore."],
        BBOX[1.13,103.59,1.47,104.07]],
    ID["EPSG",3414]]
st_crs(mpsz_sf)
Coordinate Reference System:
  User input: EPSG:3414 
  wkt:
PROJCRS["SVY21 / Singapore TM",
    BASEGEOGCRS["SVY21",
        DATUM["SVY21",
            ELLIPSOID["WGS 84",6378137,298.257223563,
                LENGTHUNIT["metre",1]]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4757]],
    CONVERSION["Singapore Transverse Mercator",
        METHOD["Transverse Mercator",
            ID["EPSG",9807]],
        PARAMETER["Latitude of natural origin",1.36666666666667,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",103.833333333333,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["Scale factor at natural origin",1,
            SCALEUNIT["unity",1],
            ID["EPSG",8805]],
        PARAMETER["False easting",28001.642,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",38744.572,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["northing (N)",north,
            ORDER[1],
            LENGTHUNIT["metre",1]],
        AXIS["easting (E)",east,
            ORDER[2],
            LENGTHUNIT["metre",1]],
    USAGE[
        SCOPE["Cadastre, engineering survey, topographic mapping."],
        AREA["Singapore - onshore and offshore."],
        BBOX[1.13,103.59,1.47,104.07]],
    ID["EPSG",3414]]

Check if the geometries are valid for the train_services_sf

length(which(st_is_valid(train_services_sf) == FALSE))
[1] 0

From the above output, we can see that there are no invalid geometries for the train_services_sf

Check if the geometries are valid for the mpsz_sf

length(which(st_is_valid(mpsz_sf) == FALSE))
[1] 9

From the above output, we can see that there are 9 invalid geometries in the mpsz data

Therefore, we will handle the invalid geometries and check if the changes are made.

mpsz_sf <- st_make_valid(mpsz_sf)
length(which(st_is_valid(mpsz_sf) == FALSE))
[1] 0

Visualise the Location of the Train Services on the Singapore Map

tmap_mode('view')

tm_shape(mpsz_sf) + 
  tm_polygons() +
tm_shape(train_services_sf) +
  tm_dots(col="red", size=0.1)
tmap_mode('plot')

Import Aspatial Data

june_2019 <- read_csv("data/aspatial/30062019.csv")
june_2021 <- read_csv("data/aspatial/30062021.csv")
hotels <- read_csv("data/aspatial/hotels.csv")
tourism <- read_csv("data/aspatial/tourism.csv")

Aspatial Data Wrangling

Learn more about each of the aspatial datasets

glimpse(june_2019)
Rows: 8,293
Columns: 16
$ id                             <dbl> 49091, 50646, 56334, 71609, 7~
$ name                           <chr> "COZICOMFORT LONG TERM STAY R~
$ host_id                        <dbl> 266763, 227796, 266763, 36704~
$ host_name                      <chr> "Francesca", "Sujatha", "Fran~
$ neighbourhood_group            <chr> "North Region", "Central Regi~
$ neighbourhood                  <chr> "Woodlands", "Bukit Timah", "~
$ latitude                       <dbl> 1.44255, 1.33235, 1.44246, 1.~
$ longitude                      <dbl> 103.7958, 103.7852, 103.7967,~
$ room_type                      <chr> "Private room", "Private room~
$ price                          <dbl> 81, 80, 68, 200, 92, 102, 203~
$ minimum_nights                 <dbl> 180, 90, 6, 1, 1, 1, 1, 7, 30~
$ number_of_reviews              <dbl> 1, 18, 20, 12, 20, 35, 23, 15~
$ last_review                    <date> 2013-10-21, 2014-12-26, 2015~
$ reviews_per_month              <dbl> 0.01, 0.28, 0.21, 0.13, 0.21,~
$ calculated_host_listings_count <dbl> 2, 1, 2, 9, 9, 9, 9, 1, 4, 4,~
$ availability_365               <dbl> 365, 365, 365, 353, 353, 348,~
glimpse(june_2021)
Rows: 4,238
Columns: 16
$ id                             <dbl> 49091, 50646, 56334, 71609, 7~
$ name                           <chr> "COZICOMFORT LONG TERM STAY R~
$ host_id                        <dbl> 266763, 227796, 266763, 36704~
$ host_name                      <chr> "Francesca", "Sujatha", "Fran~
$ neighbourhood_group            <chr> "North Region", "Central Regi~
$ neighbourhood                  <chr> "Woodlands", "Bukit Timah", "~
$ latitude                       <dbl> 1.44129, 1.33432, 1.44041, 1.~
$ longitude                      <dbl> 103.7957, 103.7852, 103.7967,~
$ room_type                      <chr> "Private room", "Private room~
$ price                          <dbl> 81, 80, 67, 177, 81, 81, 52, ~
$ minimum_nights                 <dbl> 180, 90, 6, 90, 90, 90, 14, 1~
$ number_of_reviews              <dbl> 1, 18, 20, 20, 24, 48, 20, 13~
$ last_review                    <date> 2013-10-21, 2014-07-08, 2015~
$ reviews_per_month              <dbl> 0.01, 0.22, 0.16, 0.29, 0.34,~
$ calculated_host_listings_count <dbl> 2, 1, 2, 4, 4, 4, 50, 50, 7, ~
$ availability_365               <dbl> 365, 365, 365, 365, 365, 365,~
glimpse(hotels)
Rows: 422
Columns: 9
$ NAME              <chr> "Jayleen Clarke Quay Hotel", "JEN Singapor~
$ ADDRESSPOSTALCODE <dbl> 59390, 238858, 249716, 399041, 238485, 399~
$ ADDRESSSTREETNAME <chr> "25 New Bridge Road", "277 Orchard Road , ~
$ HYPERLINK         <chr> "jayleenclarkequay@gmail.com", "singaporeo~
$ TOTALROOMS        <dbl> 20, 499, 565, 42, 81, 33, 17, 634, 56, 451~
$ KEEPERNAME        <chr> "James Federick Chong Kah Yean", "Kuok Oon~
$ Lat               <dbl> 1.288715, 1.300510, 1.304271, 1.311586, 1.~
$ Lng               <dbl> 103.8475, 103.8392, 103.8239, 103.8775, 10~
$ ICON_NAME         <chr> "hotel.gif", "hotel.gif", "hotel.gif", "ho~
glimpse(tourism)
Rows: 107
Columns: 17
$ NAME              <chr> "Chinatown Heritage Centre, Singapore", "T~
$ DESCRIPTION       <chr> "Experience how Singapore’s early Chinese ~
$ ADDRESSSTREETNAME <chr> "48 Pagoda Street", "158 Telok Ayer Street~
$ HYPERLINK         <chr> "http://www.singaporechinatown.com.sg/", "~
$ PHOTOURL          <chr> "www.yoursingapore.com/content/dam/desktop~
$ URL_PATH          <chr> "www.yoursingapore.com/en/see-do-singapore~
$ IMAGE_ALT_TEXT    <chr> "Learn more about local Chinese culture at~
$ PHOTOCREDITS      <chr> "Joel Chua DY", "Joel Chua DY", "Joel Chua~
$ LASTMODIFIED      <dttm> 2015-11-02 02:16:52, 2015-11-02 02:20:57,~
$ LATITUDE          <dbl> 1.283510, 1.280940, 1.310070, 1.277219, 1.~
$ LONGTITUDE        <dbl> 103.8444, 103.8476, 103.8994, 103.8373, 10~
$ META_DESCRIPTION  <chr> "At the Chinatown Heritage Centre, experie~
$ OPENING_HOURS     <chr> "Daily, 9am – 8pm,Last entry at 7pm.*China~
$ Lat               <dbl> 1.283510, 1.280940, 1.310070, 1.277219, 1.~
$ Lng               <dbl> 103.8444, 103.8476, 103.8994, 103.8373, 10~
$ ICON_NAME         <chr> "tourist_spot.gif", "tourist_spot.gif", "t~
$ ADDRESSPOSTALCODE <dbl> NA, NA, NA, 0, NA, NA, NA, NA, NA, NA, NA,~

After using the glimpse() function on all the aspatial data, there are a few observations:

Check if there are rows that have missing values in june_2019 dataframe longtitude or latitude columns

sum(is.na(june_2019$latitude))
[1] 0
sum(is.na(june_2019$longitude))
[1] 0

Check if there are rows that have missing values in june_2021 dataframe longtitude or latitude columns

sum(is.na(june_2021$latitude))
[1] 0
sum(is.na(june_2021$longitude))
[1] 0

Check if there are rows that have missing values in hotels dataframe longtitude or latitude columns

sum(is.na(hotels$Lng))
[1] 0
sum(is.na(hotels$Lat))
[1] 0

Check if there are rows that have missing values in tourism dataframe longtitude or latitude columns

There are 2 geographic coordinates in the tourism dataframe

sum(is.na(tourism$LONGTITUDE))
[1] 1
sum(is.na(tourism$LATITUDE))
[1] 1
sum(is.na(tourism$Lat))
[1] 0
sum(is.na(tourism$Lng))
[1] 0

We can see that the LONGTITUDE and LATITUDE columns contains missing values but the Lat and Lng columns contains no missing values.

Extract the row that contains missing values in the LONGTITUDE and LATITUDE columns

tourism[(is.na(tourism$LONGTITUDE) | tourism$LONGTITUDE=="" | is.na(tourism$LATITUDE) | tourism$LATITUDE == ""), ]
# A tibble: 1 x 17
  NAME   DESCRIPTION   ADDRESSSTREETNAME HYPERLINK PHOTOURL  URL_PATH 
  <chr>  <chr>         <chr>             <chr>     <chr>     <chr>    
1 Cruis~ Watch the Si~ <NA>              <NA>      www.your~ www.your~
# ... with 11 more variables: IMAGE_ALT_TEXT <chr>,
#   PHOTOCREDITS <chr>, LASTMODIFIED <dttm>, LATITUDE <dbl>,
#   LONGTITUDE <dbl>, META_DESCRIPTION <chr>, OPENING_HOURS <chr>,
#   Lat <dbl>, Lng <dbl>, ICON_NAME <chr>, ADDRESSPOSTALCODE <dbl>

Although Crusises from Singapore is a tourism activity, it does not belong to a specific location on the map. Therefore we will remove it from the dataframe. This is because having 0,0 coordinates can many different meanings in Geospatial Data

tourism <- tourism[!(is.na(tourism$LONGTITUDE) | tourism$LONGTITUDE=="" | is.na(tourism$LATITUDE) | tourism$LATITUDE == ""), ]

Check if the row is removed from the dataframe

tourism[(is.na(tourism$LONGTITUDE) | tourism$LONGTITUDE=="" | is.na(tourism$LATITUDE) | tourism$LATITUDE == ""), ]
# A tibble: 0 x 17
# ... with 17 variables: NAME <chr>, DESCRIPTION <chr>,
#   ADDRESSSTREETNAME <chr>, HYPERLINK <chr>, PHOTOURL <chr>,
#   URL_PATH <chr>, IMAGE_ALT_TEXT <chr>, PHOTOCREDITS <chr>,
#   LASTMODIFIED <dttm>, LATITUDE <dbl>, LONGTITUDE <dbl>,
#   META_DESCRIPTION <chr>, OPENING_HOURS <chr>, Lat <dbl>,
#   Lng <dbl>, ICON_NAME <chr>, ADDRESSPOSTALCODE <dbl>

Convert the R Dataframes into sf objects and transform the coordinate projection system

Steps Taken:

june_2019_sf <- st_as_sf(june_2019, coords = c("longitude", "latitude"), crs = 4326) %>%
  st_transform(crs=3414)

june_2021_sf <- st_as_sf(june_2021, coords = c("longitude", "latitude"), crs = 4326) %>%
  st_transform(crs=3414)

hotels_sf <- st_as_sf(hotels, coords = c("Lng", "Lat"), crs = 4326) %>%
  st_transform(crs=3414)

tourism_sf <- st_as_sf(tourism, coords = c("Lng", "Lat"), crs = 4326) %>%
  st_transform(crs=3414)
tmap_mode('view')

tm_shape(mpsz_sf) + 
  tm_polygons() +
tm_shape(tourism_sf) +
  tm_dots(col="red", size=0.1)
tmap_mode('plot')




```{.r .distill-force-highlighting-css}